Part 1: PCA with penguins

penguin_pca <- penguins %>% 
  select(body_mass_g, ends_with("_mm")) %>% # To select several columns that all end with a particular string, e.g., _mm
  drop_na() %>% # To remove NA
  scale() %>% # To take the data and put in all on similar scales
  prcomp() # To create a PCA outcome

penguin_pca$rotation # Bring up loadings for each of the PCA, what are the loadings for the variable, e.g., rotation
##                          PC1         PC2        PC3        PC4
## body_mass_g        0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm     0.4552503 0.597031143  0.6443012 -0.1455231
## bill_depth_mm     -0.4003347 0.797766572 -0.4184272  0.1679860
## flipper_length_mm  0.5760133 0.002282201 -0.2320840  0.7837987
# Type ?scale and ?autoplot in the Console to learn more about these functions

# Make a new dataset with the observations that exist, match the observations that were used to create this PCA biplot but still contain the other variables.

penguin_complete <- penguins %>% 
  drop_na(body_mass_g, ends_with("_mm")) # make new dataset from penguins that has the exact same observations used in the PCA but also has the other variables

autoplot(penguin_pca,
         data = penguin_complete,
         colour = "species", # use other dataset for the aesthetics 
         loadings = TRUE, # add loadings (arrows)
         loadings.label = TRUE) + # add labels for the loadings, can customize the location of labels so they don't overlap
  theme_minimal()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Part 2: ggplot customization & reading in different file types

Read in an .xlsx file & do some wrangling

fish_noaa <- read_excel(here("data", "foss_landings.xlsx")) %>% 
  clean_names() %>% # default to convert titles to lower snake case
  mutate(across(where(is.character), tolower)) %>% # workhorse for how to transform existing columns, mutate columns across whatever columns are a character class and covert to lowercase, across or ends_with or starts_with are helper functions
  mutate(nmfs_name = str_sub(nmfs_name, end = - 4)) %>% # string sub extracts or replaces substrings from a character vector, put in the same column (replace) 
  filter(confidentiality == "public")

Make a customized graph:

fish_plot <- ggplot(data = fish_noaa, aes(x=year, y=pounds)) +
  geom_line(aes(color = nmfs_name), show.legend = FALSE) +
  theme_minimal()
fish_plot
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggplotly(fish_plot) # to create an interactive graph